* This file: ./replication\tables_3_4_5\tables_3_4_5_gsoep.do
	* net install soephelp, replace from(http://companion.soep.de/SOEPhelp/)

* Replication files for Tables 3, 4, and 5 in:
* Iversen, Torben, and Philipp Rehm. Forthcoming. "Information and Financialization: Credit Markets as a New Source of Inequality." Comparative Political Studies.

* Data:
* Tables 3, 4, and 5 use data from the German Socio-Economic Panel Study. Due to data protection regulations, the data cannot be made publicly available. However, the data are available for scientific research free of charge and any researcher can apply for access of the data. For details on how to gain access, see https://www.diw.de/en/diw_01.c.601584.en/data_access.html
* Please contact the SOEP department at DIW Berlin (the dataholder) via soepmail@diw.de for enquiries.
* The results reported in the article can be replicated using the raw data provided by DIW Berlin, once a data access agreement has been signed. The article uses SOEP-Core, v35i (International Version, 95%), doi:10.5684/soep-core.v35i

///////////
// HH level data
//////////
cd D:\E\gsoep\gsoep1984-2018\
cd ./v35_STATA_DE+EN_international\soep.v35.international.stata_dta\

use hgen.dta, clear

gen year=syear

// CPI
* https://data.oecd.org/price/inflation-cpi.htm#indicator-chart
gen cpi=.
replace cpi=48.67	if year==1980
replace cpi=51.76	if year==1981
replace cpi=54.47	if year==1982
replace cpi=56.27	if year==1983
replace cpi=57.62	if year==1984
replace cpi=58.81	if year==1985
replace cpi=58.74	if year==1986
replace cpi=58.88	if year==1987
replace cpi=59.63	if year==1988
replace cpi=61.29	if year==1989
replace cpi=62.94	if year==1990
replace cpi=65.49	if year==1991
replace cpi=68.80	if year==1992
replace cpi=71.88	if year==1993
replace cpi=73.82	if year==1994
replace cpi=75.08	if year==1995
replace cpi=76.17	if year==1996
replace cpi=77.64	if year==1997
replace cpi=78.35	if year==1998
replace cpi=78.81	if year==1999
replace cpi=79.94	if year==2000
replace cpi=81.53	if year==2001
replace cpi=82.69	if year==2002
replace cpi=83.54	if year==2003
replace cpi=84.93	if year==2004
replace cpi=86.25	if year==2005
replace cpi=87.61	if year==2006
replace cpi=89.62	if year==2007
replace cpi=91.98	if year==2008
replace cpi=92.27	if year==2009
replace cpi=93.28	if year==2010
replace cpi=95.22	if year==2011
replace cpi=97.13	if year==2012
replace cpi=98.59	if year==2013
replace cpi=99.49	if year==2014
replace cpi=100.00	if year==2015
replace cpi=100.49	if year==2016
replace cpi=102.01	if year==2017
replace cpi=103.78	if year==2018
replace cpi=105.28	if year==2019
replace cpi=105.81	if year==2020

// Merge in HH weights
cap drop _merge
merge m:1 hid syear using hpathl.dta 
drop if _merge==2
cap drop _merge
clonevar weight=hhrf
clonevar lweight=hbleib

// East vs. West
gen west=(sampreg==1) if inlist(sampreg,1,2)

// home owner
tab hgowner
cap drop owner
gen owner=(hgowner==1) if inlist(hgowner,1,2,3,4)

// owner by purchase
gen owner2=0 if inlist(hgowner,2,3,4)
replace owner2=1 if inlist(hgacquis,1,3)
replace owner2=2 if inlist(hgacquis,2)
bys owner2: tab hgacquis hgowner , m
label define owner2 0 "Doesn't own" 1 "Owner by purchase" 2 "Owner by inheritance", modify
label val owner2 owner2
tab year owner2

tab year owner2 [aw=weight], row nofreq
tab year owner2 if west==1 & inlist(hsample,1,3,5,6,8,10,11) & hpop==1 [aw=weight], row nofreq
tab year owner2 if west==1 & inlist(hsample,1,3,5,6,8,10,11) & hpop==1 [aw=lweight], row nofreq

tab year owner2 if west==1 & inlist(hsample,1,3,5,6) & hpop==1 [aw=weight], row nofreq
tab year owner2 if west==1 & inlist(hsample,1,3,5,6) & hpop==1 [aw=lweight], row nofreq

// HH income
bys hid: gen N=_N
tab hsample N

gen hinc=hghinc if hghinc>0
gen rhinc=hghinc/(cpi/100) if hghinc>0
bys hid:  egen mean_rhinc=mean(rhinc) if N>=5 & N<.
egen hinc_XT3=xtile(mean_rhinc), nq(3) weights(weight)
label var hinc_XT3 "Permanent HH net income tertile"

label define hinc_XT3 ///
	1 "Tertile 1 (poorest)" ///
	2 "Tertile 2" ///
	3 "Tertile 3 (richest)",  modify
label val hinc_XT3 hinc_XT3
tab hinc_XT3

* gen post=0 if inrange(year,2000,2004)
* replace post=1 if inrange(year,2006,2010)
* label define post 0 "2000-04" 1 "2006-10", modify
* label val post post

label define shsample 1 "Sample A" 2 "B" 3 "Sample C" 4 "D" 5 "Sample E" 6 "Sample F" 7 "G" 8 "H" 9 "I" 10 "J" 11 "K" 12 "L1" 13 "L2" 14 "L3" 15 "M1" 16 "M2" 17 "M3" 18 "M4" 19 "M5" 20 "N" 21 "O", modify
label val hsample shsample

label define shsample2 ///
	1 "[A] West German Sample 1984" ///
	3 "[C] East German Sample 1990" ///
	5 "[E] Refresher Sample 1998" ///
	6 "[F] Refresher Sample 2000", modify
label val hsample shsample2

gen cweight=weight*lweight

// Add in P-level information on employment, as well as age and gender
preserve
	use pgen.dta, clear
	isid pid syear
	
	// merge in information about HH-head
	merge m:1 pid syear using pbrutto.dta, keepusing(stell_h)
	gen head1=(stell_h==0) if inrange(stell_h,0,71)
	gen head2=inlist(stell_h,11,12,13) if inrange(stell_h,0,71)
		*[0] Haushaltsvorstand,Bezugsperson 
		*[11] Ehegatte/in
		*[12] gleichgeschl.Partner/in
		*[13] Lebenspartner/in
	
	gen year=syear
	
	gen unemployed=(pglfs==6) if inrange(pglfs,1,12)
		* [-2] trifft nicht zu
		* [-1] keine Angabe
		* [1] Non-working
		* [2] NW-age 65 and older
		* [3] NW-in education-training
		* [4] NW-maternity leave
		* [5] NW-military-community service
		* [6] NW-unemployed
		* [8] NW-but sometimes sec. job
		* [9] NW-work but past 7 days
		* [10] NW-but reg. sec. job
		* [11] Working
		* [12] Working but NW past 7 days

	tabstat unemployed, by(year) f(%4.3f)
	gen max_unemployed=unemployed
	gen my_unemployed=unemployed
	gen head1_unemployed=unemployed if head1==1
	gen head2_unemployed=unemployed if head2==1
	
	* Age and gender, at P-level
	cap drop _merge
	merge m:1 pid using ppath.dta, keepusing(gebjahr sex)
	
	gen age=year-gebjahr if gebjahr>1882
	gen female=(sex==2) if inlist(sex,1,2)

	foreach v in age female {
		gen `v'_my=`v'
		gen `v'_min=`v'
		gen `v'_max=`v'
		gen `v'_head1=`v' if head1==1
		gen `v'_head2=`v' if head2==1
	}
	drop if hid==.
	collapse (mean) *_my my_unemployed  ///
		head*_unemployed *_head* ///
		(min) *_min (max) *_max max_unemployed (p50) age_p50=age, by(hid year)
	
	isid hid year
	tempfile t
	save `t'
restore
cap drop _merge
merge 1:1 hid year using `t'
drop if _merge==2
drop _merge

keep ///
	hid year hsample weight ///
	head?_unemployed owner2 hinc_XT3

* sample
keep if inrange(hsample,1,6) & inrange(year,2000,2010)

cap drop post
gen post=(year>=2005)

* UR, from use ./Data\OECD\unemployment_rate\unemployment_rate.dta
* Source: http://stats.oecd.org/Index.aspx?DataSetCode=ALFS_SUMTAB
preserve
	clear
	input year	UR // YGTT06PC_ST
		1990	 4.82
		1991	 5.57
		1992	 6.62
		1993	 7.87
		1994	 8.40
		1995	 8.13
		1996	 8.86
		1997	 9.82
		1998	 9.20
		1999	 8.41
		2000	 7.76
		2001	 7.84
		2002	 8.65
		2003	 9.64
		2004	 9.79
		2005	11.17
		2006	10.25
		2007	 8.66
		2008	 7.53
		2009	 7.74
		2010	 6.97
		2011	 5.83
		2012	 5.38
		2013	 5.23
		2014	 4.98
		2015	 4.62
		2016	 4.12
		2017	 3.75
		2018	 3.38
		2019	 3.14
	end
	tempfile t
	save `t'
restore
cap drop _merge
merge m:1 year using `t'
drop if _merge==2
cap drop _merge

// Identify HH w/ significant unemployment
gen unemployed=max(head1_unemployed,head2_unemployed)
tab unemployed
gen pre_unemployed=unemployed if inrange(year,2000,2004)
bys hid: egen max_pre_unemployed=max(pre_unemployed)

cap drop homeown?
gen homeown1=inlist(owner2,1,2) if inlist(owner2,0,1,2)
gen homeown2=inlist(owner2,1) if inlist(owner2,0,1)
label list owner2
label var homeown1 "Homeownership (w/ inheritance)"
label var homeown2 "Homeownership (w/o inheritance)"

// Identify owners pre:
foreach v in homeown1 homeown2 {
	* house at least once in 2000-2004
	cap drop pre_`v'
	bys hid: egen pre_`v'=max(`v') if inrange(year,2000,2004)
	tab pre_`v'
	bys hid (pre_`v'): replace pre_`v'=pre_`v'[1]
	tab pre_`v' if inrange(year,2000,2010)
	
	* house for entirety of 2000-2004
	cap drop min_pre_`v'
	bys hid: egen min_pre_`v'=min(`v') if inrange(year,2000,2004)
	tab min_pre_`v'
	bys hid (min_pre_`v'): replace min_pre_`v'=min_pre_`v'[1]
	tab min_pre_`v' if inrange(year,2000,2010)
}
table year unemployed if pre_homeown1==1 & inrange(hsample,1,6) & inrange(year,2000,2010) [aw=weight], cont(mean homeown1) f(%3.2f)
table year unemployed if pre_homeown2==1 & inrange(hsample,1,6) & inrange(year,2000,2010) [aw=weight], cont(mean homeown2) f(%3.2f)
table year unemployed if min_pre_homeown1==1 & inrange(hsample,1,6) & inrange(year,2000,2010) [aw=weight], cont(mean homeown1) f(%3.2f)
table year unemployed if min_pre_homeown2==1 & inrange(hsample,1,6) & inrange(year,2000,2010) [aw=weight], cont(mean homeown2) f(%3.2f)

* Rich vs. poor
gen rich=(hinc_XT3==3) if inlist(hinc_XT3,1,3)
gen poor=(hinc_XT3==1) if inlist(hinc_XT3,1,3)

recode unemployed (0=1) (1=0), gen(emp)
label define emp 1 "Not unempl" 0 "Unempl", modify
label val emp emp

compress
note: .\replication\tables_3_4_5\tables_3_4_5_gsoep.do
note: Created on `= c(current_date)'
cd ..
cd ..
saveold tables_3_4_5_gsoep.dta, version(12)

* use tables_3_4_5_gsoep.dta, clear

cd C:\Dropbox\Rehm\iversen\credit_markets\manuscript\submission_CPS\replication\tables_3_4_5/

// install user-written commands
* ssc install diff
which diff
* ssc install outreg2

**********************************
* Table 3: Homeownership (GSOEP)
**********************************
diff homeown1 [aw=weight], p(post) t(unemployed) cluster(hid) 
outreg2 using table_3.xls, ctitle(`r(depvar)') addstat(Mean control t(0), ///
        r(mean_c0), Mean treated t(0), r(mean_t0), Diff t(0), r(diff0), Mean ///
        control t(1), r(mean_c1), Mean treated t(1), r(mean_t1), Diff t(1), ///
        r(diff1)) label excel keep(_diff) nocons
* ^ this is table 3
diff homeown1 [aw=weight], p(unemployed) t(post) cluster(hid)
* ^ these are the remaining margins for Table 3
* Here some robustness checks:
diff homeown1 [aw=weight], p(post) t(unemployed) cluster(hid) cov(UR)
diff homeown1 [aw=weight], p(post) t(unemployed) cluster(hid) cov(year)
diff homeown1 [aw=weight], p(post) t(unemployed) cluster(hid) cov(UR year)
diff homeown2 [aw=weight], p(post) t(unemployed) cluster(hid) 

* Homeownership: employed vs. unemployed; pre vs. post. Conditional on being homeowner in the pre-period
**********************************
* Table 4: Homeownership (GSOEP), conditional on being homeowner pre-reform
**********************************
diff homeown1 if min_pre_homeown1==1 [aw=weight], p(post) t(unemployed) cluster(hid) 
outreg2 using table_4.xls, ctitle(`r(depvar)') addstat(Mean control t(0), ///
        r(mean_c0), Mean treated t(0), r(mean_t0), Diff t(0), r(diff0), Mean ///
        control t(1), r(mean_c1), Mean treated t(1), r(mean_t1), Diff t(1), ///
        r(diff1)) label excel keep(_diff) nocons
* ^ this is table 4
diff homeown1 if min_pre_homeown1==1 [aw=weight], p(unemployed) t(post) cluster(hid) 
* ^ these are the remaining margins in Table 4
* Some robustness checks:
diff homeown1 if min_pre_homeown1==1 [aw=weight], p(post) t(unemployed) cluster(hid) cov(UR)
diff homeown1 if min_pre_homeown1==1 [aw=weight], p(post) t(unemployed) cluster(hid) cov(year)
diff homeown1 if min_pre_homeown1==1 [aw=weight], p(post) t(unemployed) cluster(hid) cov(UR year)
diff homeown2 if min_pre_homeown2==1 [aw=weight], p(post) t(unemployed) cluster(hid) 
diff homeown1 if pre_homeown1==1 [aw=weight], p(post) t(unemployed) cluster(hid) 
diff homeown2 if pre_homeown2==1 [aw=weight], p(post) t(unemployed) cluster(hid) 


* Homeownership: rich (“Control”) vs. poor (“Treated”); pre vs. post. (Conditional on not being unemployed):
**********************************
* Table 5: Homeownership (GSOEP), rich vs. poor employed
**********************************
diff homeown1 if unemployed==0 [aw=weight], p(post) t(poor) cluster(hid) 
outreg2 using table_5.xls, ctitle(`r(depvar)') addstat(Mean control t(0), ///
        r(mean_c0), Mean treated t(0), r(mean_t0), Diff t(0), r(diff0), Mean ///
        control t(1), r(mean_c1), Mean treated t(1), r(mean_t1), Diff t(1), ///
        r(diff1)) label excel keep(_diff) nocons
* ^ this is table 5
diff homeown1 if unemployed==0 [aw=weight], p(poor) t(post) cluster(hid) 
* ^ these are the remaining margins in table 5
* Robustness
diff homeown1 if unemployed==0 [aw=weight], p(post) t(poor) cluster(hid) cov(UR) 
diff homeown1 if unemployed==0 [aw=weight], p(post) t(poor) cluster(hid) cov(year) 
diff homeown1 if unemployed==0 [aw=weight], p(post) t(poor) cluster(hid) cov(UR year) 
diff homeown2 if unemployed==0 [aw=weight], p(post) t(poor) cluster(hid) 

* Extension: "As in the case of the unemployed, that is mainly true for those who were not already owners before the reform"
diff homeown1 if unemployed==0 & min_pre_homeown1==0 [aw=weight], p(post) t(poor) cluster(hid) 
diff homeown1 if unemployed==0 & min_pre_homeown1==1 [aw=weight], p(post) t(poor) cluster(hid) 
diff homeown2 if unemployed==0 & min_pre_homeown1==0 [aw=weight], p(post) t(poor) cluster(hid) 
diff homeown2 if unemployed==0 & min_pre_homeown1==1 [aw=weight], p(post) t(poor) cluster(hid) 
